home *** CD-ROM | disk | FTP | other *** search
- /*
- 11 August 1995.
-
- Submission to MacTech Programmer's challenge August 1995.
-
- Copyright 1995, Ernst Munter, Kanata, ON, Canada.
-
- The problem and the solution:
- ---------------------------------
-
- Given two texts, compute a series of insert/delete/move
- instructions that will describe the conversion of one
- text into the other.
-
- I parse the old text, and create a sequential word list.
- The words are further linked into lists, accessed through
- a hash table.
-
- Then, I parse the second text, and for each word in the
- second text, I try to find a matching word in the first
- text. The hashed table of lists helps me find the first
- matching word quickly. I remove it from the list, and
- mark it for a potential move operation.
-
- Each matching word found in the first text may either
- remain in place, or must be moved.
-
- There exists an algorithm to determine the best set
- of words (most characters) to leave undisturbed, and so
- minimize the amount to be moved. However, I did not have
- the patience to work this out fully.
-
- So, as a compromise solution, I just go sequentially
- through the texts together, and make an educated guess
- for each block of contiguous matching words, to decide
- whether to leave or move them.
-
- Any words encountered in the new text that are not found
- in the old text are immediately marked for "insert".
- After the whole text is scanned, any words or blocks
- left behind in the hash lists, are destined for deletion.
- Before the parsing, a character by character comparison
- of the two texts is done from each end, to cut off all
- text which is equal and does not need any further
- analysis. This may result in the discovery that both
- texts are identical. All other special cases, such as
- only a single change at one end or in the middle of one
- of the texts, will be automatically handled.
-
- Before returning the DiffRecs to the caller, I analyze
- pairs of inserts and deletes to eliminate redundancies.
-
- The program will allocate a fair amount of memory to
- build the word lists in. This can be minimized by
- doing a word count first. But I prefer to just provide
- a conservative factor (4 chars per word, white space
- included) which should be enough for normal texts.
- Please use the "countWords" directive if memory must
- be conserved.
-
- I have allocated a fixed static hash table of 997
- entries. With 65000 bytes, and say, 6.5 bytes per
- word really, each list would contain 10 entries on
- the average. hashMod can easily be increased to
- some larger prime number to speed up word lookup for
- large files.
-
- I have also provided a lookup table for determining
- what characters are considered parts of words, and
- which are white space. Include foreign letters and
- digits if that is desirable. If the texts are
- computer programs, underscore and other symbols
- might be included in "alpha", depending on the language.
-
- */
-
- #include <stdio.h>
- #include <stdlib.h>
-
- #define ulong unsigned long
- #define ushort unsigned short
-
- typedef enum {
- deletedText=0, insertedText, movedText } DiffType;
-
- typedef struct {
- DiffType type;
- ulong rangeStart;
- ulong rangeEnd;
- ulong position;
- } DiffRec;
-
- short FindWordDifferences (
- char *oldText,
- char *newText,
- ulong numOldChars,
- ulong numNewChars,
- DiffRec diffs[],
- ulong maxDiffRecs);
-
- #define countWords 0
-
- #if countWords
- /*use the actual word count to reserve Snippet space*/
- #else
- /*use a very conservative estimate to reserve enough*/
- #define averageWordSize 4
- #endif
-
- #define hashMod 997 //any reasonable prime number
-
- #define inDelete 0 //state values during text scan
- #define inMove 1
- #define inInsert 2
- #define inLimbo 3
-
- #define nullRec 3 //used to mark redundant DiffRecs
-
-
- /*A word is defined as a contiguous sequence of letters
- from the set defined in the lookup table.
-
- The table defaults to 'A'-'Z','a'-'z', but foreign
- characters or digits are easily included if desired.
- */
-
- #define includeForeign 0
- #define includeDigits 0
-
- static char lookup[256] = {
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //control
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //control
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //punctuation
- #if includeDigits
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //digits plus
- #else
- 1,1,1,1,1,1,1,1, 1,1,0,0,0,0,0,0, //digits plus
- #endif
- 0,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, //caps A-O
- 1,1,1,1,1,1,1,1, 1,1,1,0,0,0,0,0, //caps P-Z
- 0,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, //small a-o
- 1,1,1,1,1,1,1,1, 1,1,1,0,0,0,0,0, //small p-z
- #if includeForeign
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, //foreign caps
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, //foreign small
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols
- 0,0,0,0,0,0,0,0, 0,0,0,1,1,1,1,1, //symbols plus
- 0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0, //symbols plus
- #else
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //foreign caps
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //foreign small
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols plus
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //symbols plus
- #endif
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, //graphic chars
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 //graphic chars
- };
-
- #define isAlpha(x) lookup[x]
-
- /*A snippet may describe a word or a block of words
- in the old text and includes the white space.
- The reference to the new text is used to track
- the equivalent position in the new text.
- */
- typedef struct Snippet {
- DiffType type;
- char* oldTextRef;
- char* newTextRef;
- ulong length;
- void* next;
- } Snippet;
-
- /*The hashTable is an array of lists where each list
- contains the snippets (words or blocks) from the
- old text which hash to the same value, in the order
- in which they occured.
- */
- typedef struct List {
- Snippet* first;
- Snippet* last;
- } List;
-
- static List hashTable[hashMod];
- static Snippet* snippetStore; //allocated as needed
- static ushort numSnippets;
- static ushort nextFreeSnippet;
-
-
- /*The hash function is used to distribute different
- snippets into different lists as evenly as possible.
- */
- ushort Hash(char* text,ulong numChars)
- {
- register ulong accumulator=numChars;
- if (numChars>6) numChars=6;
- switch (numChars) {
- case 6:accumulator=(accumulator<<5)+*text++;
- case 5:accumulator=(accumulator<<5)+*text++;
- case 4:accumulator=(accumulator<<5)+*text++;
- case 3:accumulator=(accumulator<<5)+*text++;
- case 2:accumulator=(accumulator<<5)+*text++;
- case 1:accumulator=(accumulator<<5)+*text++;
- case 0:;
- }
- return accumulator % hashMod;
- }
-
- /*Clears all snippets to 0*/
- void ClearSnippetStore()
- {
- Snippet* s=snippetStore;
- ushort n=numSnippets-1;
- s->type=deletedText;
- s->oldTextRef=0;
- s->newTextRef=0;
- s->length=0;
- s->next=0;
- s++;
- while (n--) *s++=*snippetStore;
- }
-
- /*Preallocates an array of snippets to handle oldText.
- All snippets are initially marked deletedText.
- */
- void GetSnippetStore(ulong snippetsAllocated)
- {
- ulong memoryRequired;
- if (snippetsAllocated>65535) snippetsAllocated=65535;
- numSnippets=(ushort)snippetsAllocated;
- memoryRequired=numSnippets*sizeof(Snippet);
- snippetStore=(Snippet*)malloc(memoryRequired);
- ClearSnippetStore();
- nextFreeSnippet=0;
- }
-
- /*Assign the next snippet from the preallocated array.*/
- Snippet* NewSnippet(char* text)
- {
- register Snippet* s;
- s=&(snippetStore[nextFreeSnippet++]);
- s->oldTextRef=text;
- return s;
- }
-
- /*Consecutive Snippets are merged into larger blocks*/
- int ExtendSnippet(Snippet* oldS,Snippet* s)
- {
- if (oldS->oldTextRef+oldS->length==s->oldTextRef) {
- oldS->length+=s->length;
- s->length=0;
- return 1;
- }
- return 0;
- }
-
- /*Snippets are attached at end of a list (hashTable[])*/
- void Record(Snippet* s)
- { register List* list=
- &(hashTable[Hash(s->oldTextRef,s->length)]);
- register Snippet* lastS=list->last;
- if (lastS) lastS->next=s;
- else list->first=s;
- list->last=s;
- }
-
- /*Matches two substrings of length numChars*/
- int Match(char* text0,char* text1,ulong numChars)
- {
- while (numChars--) {
- if (*text0!=*text1) return 0;
- text0++;
- text1++;
- }
- return 1;
- }
-
- /*Snippets of the oldText are matched against a word from
- newText. The first matching word is removed from the
- hash list.
- */
- Snippet* FindAndRemoveSnippet(char* text,ulong numChars)
- {
- List* list=&(hashTable[Hash(text,numChars)]);
- Snippet* s=list->first;
- Snippet* father=0;
- while (s) {
- if ((s->length==numChars)
- && Match(s->oldTextRef,text,numChars)) break;
- father=s;
- s=(Snippet*)(s->next);
- }
- if (father) {
- father->next=s->next;
- if (list->last==s) list->last=father;
- } else list->first=list->last=0;
- return s;
- }
-
- /*The following macros set up the 3 different types of
- DiffRecs.
- */
- #define StartMoveRecord \
- { diffPtr->type=movedText; \
- diffPtr->rangeStart=block->oldTextRef-oldText; \
- diffPtr->rangeEnd=diffPtr->rangeStart+block->length-1;\
- diffPtr->position=startOfText->oldTextRef+ \
- startOfText->length-oldText; \
- }
-
- #define StartInsertRecord \
- { diffPtr->type=insertedText; \
- diffPtr->rangeStart=text-newText; \
- diffPtr->rangeEnd=diffPtr->rangeStart+wordLength-1; \
- diffPtr->position=startOfText->oldTextRef+ \
- startOfText->length-oldText; \
- }
-
- #define StartDeleteRecord \
- { diffPtr->type=deletedText; \
- diffPtr->rangeStart=block->oldTextRef-oldText; \
- diffPtr->rangeEnd=diffPtr->rangeStart+block->length-1;\
- diffPtr->position=block->newTextRef-newText; \
- }
-
- /*The following macro marks all words from startOfText
- to "toHere" as potential candidates for delete.
- Any of the words will be overwritten and become movedText
- if they end up being matched in newText later.
- */
- #define MarkDeletedWords(toHere) \
- { canDel=startOfText; \
- while (++canDel<toHere) \
- if (canDel->type==deletedText) \
- canDel->newTextRef=markNew; \
- }
-
- /*
- A matching word may be found in the old text at a point
- far beyond the current insertion point. It would be
- a shame to declare this word the new insertion point,
- and then have to move all intervening words (yet to
- be matched). It is better to move the smaller block
- or word up. This macro is a heuristic attempt to
- make a sensible guess as to which block is larger,
- the matching word (100%) or the intervening text
- (adjusted according to the ratio of remaining chars
- still required for matching, at 50%).
- */
- #define BestGuess \
- (skippedChars*(remainingNew>>1)> \
- block->length*remainingOld)
-
- /*Words are extracted by first scanning through any
- preceding white space, then by scanning through the
- alpha characters, until another white space occurs.
-
- It is assumed that \x0 does not occur as part of either
- text. The last character in the text is then temporarily
- set to 0 so we easily find the end of text.
- */
- #define NextWord \
- { while ((*t)&&(0==isAlpha(*t))) t++; \
- while (0!=isAlpha(*t)) t++; \
- }
-
- #define EmitRecord \
- { diffPtr++; \
- if (diffPtr-diffs>=maxDiffRecs) return maxDiffRecs; \
- }
-
- #define ClearHashTable \
- { ushort n=hashMod; \
- List* H=hashTable; \
- while (n--) { \
- H->first=0; \
- H->last=0; \
- H++; \
- } \
- }
-
- /*The function ParseOldText scans the oldText and creates
- an array of snippets, where each snippet corresponds to
- one word. In addition, each snippet is linked into a
- list, where each list is headed by a List pointer in
- hashTable.
- The last character of the text is temporarily set to 0
- as an end-of-text marker.
- */
- void ParseOldText(
- char* oldText,
- ulong numSameHead,
- ulong numChars)
- {
- Snippet* s;
- char* text=oldText+numSameHead;
- char* t;
- char lastChar=text[numChars-1]; //save it
- text[numChars-1]=0;
- ClearHashTable;
- t=text;
- NewSnippet(text);
- //A null snippet anchors the start of the text.
- do {
- NextWord;
- s=NewSnippet(text);
- s->length=t-text;
- if (0==*t) break;
- Record(s);
- text=t;
- } while (1);
- *t=lastChar; //restore the last char
- s->length++;
- Record(s);
- }
-
- /* The ParseNewText function scans the newText and tries
- to match it with the oldText.
-
- It proceeds by isolating words, and matching them
- against previously created Snippets (see ParseOldText).
-
- It is a state machine which tries to agglomerate
- contiguous blocks of words while it is in either
- the insert or move state. When it switches states
- it creates a DiffRec for the preceding block.
-
- It moves the "startOfText" in the oldText along as it
- scans the newText and encounters matching blocks.
-
- If the accumulated matching (movedText) block lies
- before the current startOfText, or if it is small
- and lies far forward, it is made into a movedText record.
- But otherwise, it stays un-moved, and becomes the
- new startOfText.
-
- Blocks of contiguous words assembled during the insert
- state (i.e. no matching words in oldText) are also
- accumulated and result in insertedText records.
-
- DeletedText records are created later, after all text has
- been scanned, by collecting the leftovers in oldText,
- (see CollectDeletes below).
- */
- short ParseNewText( //returns number of inserted DiffRecs
- char* oldText,
- char* newText,
- DiffRec* diffs,
- ulong numSameHead,
- ulong numOldChars,
- ulong numNewChars,
- ulong maxDiffRecs)
- {
- Snippet* s;
- Snippet* canDel;
- Snippet* block;
- Snippet* startOfText=snippetStore;//null snippet
- Snippet* endOfText=
- NewSnippet(oldText+numSameHead+numOldChars);
- char* text=newText+numSameHead;
- char* markNew=text;
- char* t;
- DiffRec* diffPtr=diffs;
- ulong wordLength;
- ulong remainingNew=numNewChars;
- ulong remainingOld=numOldChars;
- long skippedChars; // signed !
- int state=inLimbo;
- char lastChar;
- char done=0;
-
- if (0==numNewChars) goto cleanup;
- lastChar=text[numNewChars-1];
- text[numNewChars-1]=0;
- do {
- t=text;
- NextWord;
- wordLength=t-text;
- if (0==*t) {
- *t=lastChar; //restore last char
- wordLength++;
- done=1;
- }
- if (0!=(s=FindAndRemoveSnippet(text,wordLength))) {
- s->type=movedText;
- remainingOld-=s->length;
- remainingNew-=s->length;
- switch (state) {
- case inMove:
- if (0==ExtendSnippet(block,s)) {
- skippedChars=block->oldTextRef-
- startOfText->oldTextRef-startOfText->length;
- if ((skippedChars<0) || BestGuess) {
- StartMoveRecord;
- EmitRecord;
- } else {
- MarkDeletedWords(block);
- startOfText=block;
- }
- block=s;
- }
- break;
- case inInsert:
- EmitRecord;
- case inLimbo:
- block=s;
- markNew=text;
- state=inMove;
- }
- } else {
- remainingNew-=wordLength;
- switch (state) {
- case inInsert:
- diffPtr->rangeEnd+=wordLength;
- break;
- case inMove:
- skippedChars=block->oldTextRef-
- startOfText->oldTextRef-startOfText->length;
- if ((skippedChars<0) || BestGuess) {
- StartMoveRecord;
- EmitRecord;
- } else {
- MarkDeletedWords(block);
- startOfText=block;
- }
- case inLimbo:
- StartInsertRecord;
- state=inInsert;
- }
- }
- text=t;
- } while (0==done);
-
- cleanup:
- switch (state) {
- case inMove:
- skippedChars=block->oldTextRef-
- startOfText->oldTextRef-startOfText->length;
- if (skippedChars<0) {
- StartMoveRecord;
- } else {
- MarkDeletedWords(endOfText);
- break;
- }
- case inInsert:
- EmitRecord;
- case inLimbo:
- MarkDeletedWords(endOfText);
- }
-
- return diffPtr-diffs;
- }
-
- /*The CollectDeletes function scans through the entire
- snippet array, and detects all snippets still marked
- as deletedText. Contiguous blocks of these are
- assembled to generate deletedText DiffRecs.
- */
- short CollectDeletes(
- DiffRec* diffs,
- char* oldText,
- char* newText,
- ulong maxDiffRecs)
- {
- DiffRec* diffPtr=diffs;
- Snippet* s=snippetStore;
- Snippet* block;
- ushort n=nextFreeSnippet;
- int state=inLimbo;
- while (n--) {
- if (s->length) {
- if (s->type==deletedText) {
- if (state==inLimbo) {
- block=s;
- state=inDelete;
- } else block->length+=s->length;
- } else if (state==inDelete) {
- StartDeleteRecord;
- EmitRecord;
- state=inLimbo;
- }
- }
- s++;
- }
- //cleanup:
- if (state==inDelete) {
- StartDeleteRecord;
- diffPtr++;
- }
-
- return diffPtr-diffs;
- }
-
- #if countWords
- /*This function could be used to tailor make the number of
- snippets in snippetStore to the exact number required
- */
- short WordCount(char* text,ulong numChars)
- {
- register char* t=text;
- short WC=1;
- char lastChar;
- if (numChars<=1) return numChars;
- lastChar=t[numChars-1];
- t[numChars-1]=0;
- do {
- NextWord;
- WC++;
- } while (*t);
- *t=lastChar;
- return WC;
- }
- #endif
-
- /*Null records can result from cancelling common parts
- of insert/delete pairs, for example insert " [The"
- and delete " The" would become just insert "[".
- The delete record would have a length of 0, i.e.
- a rangeEnd below rangeStart. If this occurred
- at the start of text, the value 0xFFFFFFFF would
- occur for rangeEnd, surely not a good thing.
- It is surely best to eliminate those null records.
- */
- void ExtractNullRecs(DiffRec* diffs,short numDiffRecs)
- {
- DiffRec* d=diffs+numDiffRecs;
- short i=numDiffRecs;
- short numMove;
- while (i--) {
- d--;
- if (d->type==nullRec) {
- DiffRec* dx=d+1;
- numDiffRecs--;
- numMove=numDiffRecs-i;
- while (numMove--) *d++=*dx++;
- numMove=i;
- }
- }
- }
-
- /*Since all snippets (and hence DiffRecs) refer to
- words which usually include leading white space,
- the might differ as such, but have common white
- space with different words, or vice versa.
- The ReduceInsDelPairs attempts to match pairs
- at the same text location, and remove their
- common parts.
-
- The loop relies on the existing order of records,
- as created by ParseNewText, followed by CollectCeletes.
- This means delete records are at the end, and all
- are in the order of the text. Ins and Del records
- are compared if they apply to the same insertion/
- deletion point, and share common words or white space
- at either end of the block. The aggregation of blocks
- hurts sometimes, in that redundancies in the middle
- will not be found here.
- */
- short ReduceInsDelPairs(
- DiffRec* diffs,
- short numDiffRecs,
- char* oldText,
- char* newText)
- {
- DiffRec* dRec=diffs+numDiffRecs-1;
- DiffRec* iRec=dRec;
- short eliminated=0;
- while (iRec>diffs) {
- iRec--;
- if (insertedText==iRec->type) break;
- }
- while (dRec>diffs) {
- top_of_loop:
- if (deletedText!=dRec->type) break;
- if (dRec->rangeStart>iRec->position) {
- dRec--;
- continue;
- }
- if (dRec->rangeStart==iRec->position) {
- //take care of disposable common chars at START of block
- do {
- ulong dStart=dRec->rangeStart;
- ulong iStart=iRec->rangeStart;
- ulong n=0;
- ulong n0=0;
- //take care of common white space:
- while (oldText[n+dStart]==newText[n+iStart]) {
- if (isAlpha(oldText[n+dStart])) break;
- n++;
- }
- //take care of common alpha but it must be a whole word:
- n0=n;
- while (oldText[n+dStart]==newText[n+iStart]) {
- if (isAlpha(oldText[n+dStart])) {
- n++;
- if (0==isAlpha(oldText[n+dStart])) {//success
- n0=n;
- break;
- }
- }
- }
- if (n0) {
- dRec->position+=dStart-dRec->rangeStart+n0;
- dRec->rangeStart=dStart+n0;
- iRec->position+=iStart-iRec->rangeStart+n0;
- iRec->rangeStart=iStart+n0;
- } else break;
- } while (oldText[dRec->rangeStart]==
- newText[iRec->rangeStart]);
-
- //take care of disposable common chars at END of block
- do {
- ulong dEnd=dRec->rangeEnd;
- ulong iEnd=iRec->rangeEnd;
- ulong n=0;
- ulong n0=0;
-
- //take care of common alpha but it must be a whole word:
- while (oldText[dEnd-n]==newText[iEnd-n]) {
- if (isAlpha(oldText[dEnd-n])) {
- n++;
- if (0==isAlpha(oldText[dEnd-n])) {//success
- n0=n;
- break;
- }
- }
- }
- n=n0;
- //take care of common white space:
- while (oldText[dEnd-n]==newText[iEnd-n]) {
- if (isAlpha(oldText[dEnd-n])) break;
- n++;
- }
- if (n) {
- iRec->rangeEnd=iEnd-n;
- dRec->rangeEnd=dEnd-n;
- } else break;
- } while (oldText[dRec->rangeEnd]==
- newText[iRec->rangeEnd]);
- if ((long)(dRec->rangeStart)>(long)(dRec->rangeEnd)) {
- dRec->type=nullRec;
- eliminated++;
- }
- if ((long)(iRec->rangeStart)>(long)(iRec->rangeEnd)) {
- iRec->type=nullRec;
- eliminated++;
- }
- } else while (iRec>diffs) { //find next lower iRec
- iRec--;
- if (insertedText==iRec->type) goto top_of_loop;
- }
- dRec--;
- }
- if (eliminated) ExtractNullRecs(diffs,numDiffRecs);
-
- return eliminated;
- }
-
- /*The FindWordDifferences function is primarily a
- collection of calls to the other routines.
- In addition, it tries to eliminate parts of the texts
- from processing which are completely identical at the
- start or the tail ends of the texts.
- If an insufficient number of DiffRecs was allocated,
- and maxDiffRecs is reached while processing the texts,
- the function returns this value without further ado.
- */
- short FindWordDifferences (
- char *oldText, /* pointer to old version of text */
- char *newText, /* pointer to new version of text */
- ulong numOldChars, /* number of characters in oldText */
- ulong numNewChars, /* number of characters in newText */
- DiffRec diffs[], /* pointer to preallocated array
- where text differences are to be stored */
- ulong maxDiffRecs /* number of DiffRecs preallocated */
- )
- {
- ulong numSameHead,numSameTail;
- long numUniqueOldChars;
- long numUniqueNewChars;
- short numDiffRecs=0;
- register char* OTx=oldText;
- register char* NTx=newText;
- register char* OTlimit;
- long deltaChars=numOldChars-numNewChars;
-
- if (deltaChars>0) OTlimit=oldText+numNewChars;
- else OTlimit=oldText+numOldChars;
-
- //check for common head:
- while (OTx<OTlimit) {
- if (*OTx != *NTx) break;
- OTx++;
- NTx++;
- }
-
- if ((isAlpha(*OTx)) || (isAlpha(*NTx))) {
- //backtrack to start of word
- while ((OTx>oldText) && (isAlpha(OTx[-1]))) OTx--;
- if (OTx>oldText) OTx--; //grab previous WSP
- }
-
- numSameHead=OTx-oldText;
-
- //check for common tail:
- OTx=oldText+numOldChars-1;
- NTx=newText+numNewChars-1;
- if (deltaChars>0) OTlimit=oldText+deltaChars;
- else OTlimit=oldText;
- while (OTx>OTlimit) {
- if (*OTx != *NTx) break;
- OTx--;
- NTx--;
- }
-
- if ((isAlpha(*OTx)) || (isAlpha(*NTx))) {
- //track to end of word
- while ((OTx<oldText+numOldChars-1)
- && (isAlpha(OTx[1]))) OTx++;
- }
- numSameTail=oldText-OTx+numOldChars-1;
-
- numUniqueOldChars=numOldChars-numSameTail-numSameHead;
- numUniqueNewChars=numNewChars-numSameTail-numSameHead;
-
- /*These values may be negative!
- If both are negative, old and new texts are identical,
- and the function can return immediately.
- */
- if ((numUniqueOldChars<0) && (numUniqueNewChars<0))
- return 0;
-
- #if countWords
- GetSnippetStore(3+
- WordCount(oldText+numSameHead,numUniqueOldChars));
- #else
- GetSnippetStore(3+
- numUniqueOldChars/averageWordSize);
- #endif
- if (numUniqueOldChars>0)
- ParseOldText(oldText,numSameHead,numUniqueOldChars);
-
- numDiffRecs+=
- ParseNewText(oldText,newText,diffs,
- numSameHead,numUniqueOldChars,numUniqueNewChars,
- maxDiffRecs);
-
- numDiffRecs+=
- CollectDeletes(&diffs[numDiffRecs],oldText,newText,
- maxDiffRecs-numDiffRecs);
-
- numDiffRecs-=
- ReduceInsDelPairs(diffs,numDiffRecs,oldText,newText);
-
- free(snippetStore);
-
- return numDiffRecs;
- }
-